#-*-coding:utf-8-*-
#!/usr/bin/python

'''
批量处理图片
整理图片位置，生成图片链接、title、关键字，生成对应的sql语句
'''

import os
import json
import jieba
from urllib import quote
import hashlib
import time

From_dir = 'G:/From'
To_dir = 'G:/To'
pic_url_pre = 'http://www.shenyou.tv/uploadfile/2016/'

# 生成新图片名称，取消使用urlencode方式，防止爬取
def getNewFileName(current_id, file_name):
	format_pos = file_name.find('.')
	pic_format = file_name[format_pos:]
	md5 = hashlib.md5()
	dir_path = str(int(current_id) % 256)
	md5.update(file_name[0:format_pos])
	return current_id + md5.hexdigest()[0:15] + pic_format

# os库中没有copyfile,自己简单写一个
def moveFile(from_dir, file_name, to_dir, new_file_name):
	full_name = os.path.join(from_dir, file_name)
	new_full_name = os.path.join(to_dir, new_file_name)
	data = ''
	with open(full_name, 'rb') as f:
		data = f.read()
	with open(new_full_name, 'wb') as f:
		f.write(data)

# 调用结巴分词，获取keyword
def getKeyword(title):
	#基本停用词
	#stop_words = ["的", "了", "在", "是", "我", "有", "和", "就", "不", "人", "都", "一", "一个", "上", "也", "很", "到", "说", "要", "去", "你", "会", "着", "没有", "看", "好", "自己", "这"]
	#stop_words = [line.strip() for line in open('stopword.txt').readlines() ]
	seg_list = jieba.cut(title, cut_all=False)
	#return ' '.join(list(set(seg_list)-set(stop_words)))
	return ' '.join(seg_list)

if __name__ == "__main__":
	pic_list = []
	current = ''
	current_id = ''
	title = ''
	pic_format = ''
	dir_path = ''
	f_sql = open(os.path.join(To_dir, 'pic_insert.sql'), 'a+')
	sql = ''
	for file_name in os.listdir(From_dir):
		full_name = os.path.join(From_dir, file_name)
		if not os.path.isfile(full_name):
			continue
		print full_name
		start_pos = file_name.find('_')
		end_pos = file_name.rfind('_')
		if current != file_name[start_pos+1:end_pos]:
			current = file_name[start_pos+1:end_pos]
			if(len(pic_list) != 0):
				# 将图片链接地址写入json, 通过php转格式
				json_str = json.dumps(pic_list)
				f = open(os.path.join(To_dir, current_id + '.json'), 'w')
				f.write(json_str)
				f.close()
				# 清空pic_list
				pic_list = []
			current_id = str(int(file_name[0:start_pos]) + 495)
			# 生成sy_pic的insert语句
			dir_path = str(int(current_id) % 256)
			# 转移文件夹路径
			copy_path = os.path.join(os.path.join(To_dir, 'uploadfile'), dir_path)
			# 如果文件夹不存在，生成
			if(not os.path.exists(copy_path)):
				os.mkdir(copy_path)
			key_word = getKeyword(current)
			thumb = pic_url_pre + dir_path + '/' + getNewFileName(current_id, file_name)
			# id 从1开始
			page_id = str(int(current_id) + 1)
			page_url = 'http://tu.shenyou.tv/index.php?m=content&c=index&a=show&catid=25&id=' + page_id
			inputtime = str(int(time.time()))
			sql = "INSERT INTO `shenyou`.`sy_pic` (`id`, `catid`, `typeid`, `title`, `style`, `thumb`, `keywords`, `description`, `posids`, `url`, `listorder`, `status`, `sysadd`, `islink`, `username`, `inputtime`, `updatetime`) VALUES ('" + page_id + "', '25', '0', '"+current+"', '', '" +thumb+ "', '"+key_word.encode('gbk')+"', '', '0', '"+page_url+"', '0', '1', '1', '0', 'admin', '"+inputtime+"', '0');"
			f_sql.write(sql + '\n')
		# 新文件名
		title = file_name[start_pos+1:end_pos]
		new_file_name = getNewFileName(current_id, file_name)
		url = pic_url_pre + dir_path + '/' + new_file_name
		pic_list.append({'url': url, 'alt':title.decode('gbk')})
		# 转移文件到指定位置
		moveFile(From_dir, file_name, copy_path, new_file_name)
	f_sql.close()

		
